### Replication code
### Article: "Turnover: How lame-duck governments disrupt the bureaucracy and service delivery before leaving office"
### Author: Guillermo Toral (www.guillermotoral.com)
### Date: July 2023
### This file performs descriptive analyses, generating plots, tables and statistics
### This file uses the analysis dataset created by prepare_data_merge_datasets.R, the mpsp_news_coded.csv created by prepare_data_mpsp_news.R together with hand coding of news reports, and the transitions_around_the_world.csv created by hand
### R version, platform, and package versions reported at the end of the file

# Prepare the environment -------------------------------------------------

### This section of the code prepares the environment 

# Clean the environment
rm(list = ls())

# Install required packages if not previously installed
package_list <- c("tidyverse", "here", "texreg", "xtable", "lmtest") 
packages_to_install <- package_list[!(package_list %in% installed.packages()[,"Package"])]
if(length(packages_to_install)>0){
  install.packages(packages_to_install)
}

# Load required packages
library(tidyverse)
library(here)
library(texreg)
library(xtable)
library(lmtest)

# Set Working Directory to wherever this file is located.
setwd(here())
# The directory where the code folder is located must also have a "plots" and a "tables" subdirectory

# Load datasets -----------------------------------------------------------

# Brazilian municipalities across election cycles
e <- read_csv("../../datasets/analysis/analysis_dataset_all_employees.csv")

# transitions_around_the_world, built by hand
t <- read_csv("../../datasets/analysis/transitions_around_the_world.csv") # Load transition periods datasets

# news reports from the site of the Sao Paulo Prosecutor's Office
# This dataset corresponds to the one produced by the code "prepare_data_mpsp_news.R", plus new columns 
# with codings by news piece done by a research assistant and the author
n <- read_csv("../../datasets/analysis/mpsp_news_coded.csv") 

# Generate Figure 1 (length of transition periods around the world) ---------------

pdf("../../plots/transition_periods.pdf",width=9,height=6)
par(mar = c(5.1, 10.1, 1.1, 2.1))
barplot(t$transition_length, 
        names.arg=paste0(t$country, " ", t$year), 
        horiz=T, offset=0, axisnames=T, col="black", las=2,
        xlab="Number of days between election day and the winner's first day in office")
grid()
barplot(t$transition_length,names.arg=paste0(t$country, " ", t$year), horiz=T, offset=0,axisnames=T,
        col="black",xlab="Number of days between election day and the winner's first day in office",las=2,add=T)
dev.off()

# Generate statistics on news reports from the São Paulo Prosecutor's Office --------

nr <- n %>%
  filter(as.numeric(str_sub(date, -2,-1))<23) %>% # keep only news reports from up to 2022
  filter(relevant==1) # keep only news reports that I identify as relevant (related to former mayors and violations of public employment laws)

dim(n)
dim(nr)

mean(nr$conviction=="Y")
mean(nr$fine=="Y")
mean(nr$political_rights=="Y")
mean(nr$loss_office=="Y")
mean(nr$assets_blocked=="Y")

# Generate Figure A.3 (outcome means on bureaucratic turnover, by election result) --------

r1 <- c(mean(e[which(e$challenger_margin>0),"fires_tem_q15"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"fires_tem_q15"][[1]],na.rm=T),15, "Fires temp")
r2 <- c(mean(e[which(e$challenger_margin>0),"fires_tem_q16"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"fires_tem_q16"][[1]],na.rm=T),16, "Fires temp")
r3 <- c(mean(e[which(e$challenger_margin>0),"fires_tem_q1"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"fires_tem_q1"][[1]],na.rm=T),1, "Fires temp")

r4 <- c(mean(e[which(e$challenger_margin>0),"hires_tem_q15"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"hires_tem_q15"][[1]],na.rm=T),15, "Hires temp")
r5 <- c(mean(e[which(e$challenger_margin>0),"hires_tem_q16"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"hires_tem_q16"][[1]],na.rm=T),16, "Hires temp")
r6 <- c(mean(e[which(e$challenger_margin>0),"hires_tem_q1"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"hires_tem_q1"][[1]],na.rm=T),1, "Hires temp")

r7 <- c(mean(e[which(e$challenger_margin>0),"resignations_tem_q15"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"resignations_tem_q15"][[1]],na.rm=T),15, "Resignations temp")
r8 <- c(mean(e[which(e$challenger_margin>0),"resignations_tem_q16"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"resignations_tem_q16"][[1]],na.rm=T),16, "Resignations temp")
r9 <- c(mean(e[which(e$challenger_margin>0),"resignations_tem_q1"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"resignations_tem_q1"][[1]],na.rm=T),1, "Resignations temp")

r10 <- c(mean(e[which(e$challenger_margin>0),"fires_con_q15"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"fires_con_q15"][[1]],na.rm=T),15, "Fires conc")
r11 <- c(mean(e[which(e$challenger_margin>0),"fires_con_q16"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"fires_con_q16"][[1]],na.rm=T),16, "Fires conc")
r12 <- c(mean(e[which(e$challenger_margin>0),"fires_con_q1"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"fires_con_q1"][[1]],na.rm=T),1, "Fires conc")

r13 <- c(mean(e[which(e$challenger_margin>0),"hires_con_q15"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"hires_con_q15"][[1]],na.rm=T),15, "Hires conc")
r14 <- c(mean(e[which(e$challenger_margin>0),"hires_con_q16"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"hires_con_q16"][[1]],na.rm=T),16, "Hires conc")
r15 <- c(mean(e[which(e$challenger_margin>0),"hires_con_q1"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"hires_con_q1"][[1]],na.rm=T),1, "Hires conc")

r16 <- c(mean(e[which(e$challenger_margin>0),"resignations_con_q15"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"resignations_con_q15"][[1]],na.rm=T),15, "Resignations conc")
r17 <- c(mean(e[which(e$challenger_margin>0),"resignations_con_q16"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"resignations_con_q16"][[1]],na.rm=T),16, "Resignations conc")
r18 <- c(mean(e[which(e$challenger_margin>0),"resignations_con_q1"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"resignations_con_q1"][[1]],na.rm=T),1, "Resignations conc")

table <- as.tibble(rbind(r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15, r16, r17, r18))
colnames(table) <- c("Treatment", "Control", "Quarter", "Variable")
table$period <- rep(c(1:3),6)

t1 <- as.tibble(table[c(1:3),])
t2 <- as.tibble(table[c(4:6),])
t3 <- as.tibble(table[c(7:9),])
t4 <- as.tibble(table[c(10:12),])
t5 <- as.tibble(table[c(13:15),])
t6 <- as.tibble(table[c(16:18),])

pdf("../../plots/timeseries_employment.pdf", width=9, height=6)
par(mfrow=c(2,3))
# Dismissals temp
par(mar=c(4.1, 4.1, 4.1, 2.1))
plot(t1$period, t1$Treatment, type="o", ylab="Mean", xlab="Quarter", xaxt="n", ylim=c(0,40), lwd=2, lty=3, main="Dismissals of \n temporaries",cex.main=1.5)
lines(t1$period, t1$Control, type="o", lwd=2)
axis(side=1, at=c(1:3),labels=c("Q15", "Q16", "Q1"))
# Hires temp
plot(t2$period, t2$Treatment, type="o", ylab="Mean", xlab="Quarter", xaxt="n", ylim=c(0,160), lwd=2, lty=3, main="Hires of \n temporaries",cex.main=1.5)
lines(t2$period, t2$Control, type="o", lwd=2)
axis(side=1, at=c(1:3),labels=c("Q15", "Q16", "Q1"))
# Resignations temp
plot(t3$period, t3$Treatment, type="o", ylab="Mean", xlab="Quarter", xaxt="n", ylim=c(0,10), lwd=2, lty=3, main="Resignations of \n temporaries",cex.main=1.5)
lines(t3$period, t3$Control, type="o", lwd=2)
axis(side=1, at=c(1:3),labels=c("Q15", "Q16", "Q1"))
# Dismissals conc
plot(t4$period, t4$Treatment, type="o", ylab="Mean", xlab="Quarter", xaxt="n", ylim=c(0,1), lwd=2, lty=3, main="Dismissals of \n civil servants",cex.main=1.5)
lines(t4$period, t4$Control, type="o", lwd=2)
axis(side=1, at=c(1:3),labels=c("Q15", "Q16", "Q1"))
# Hires conc
plot(t5$period, t5$Treatment, type="o", ylab="Mean", xlab="Quarter", xaxt="n", ylim=c(0,50), lwd=2, lty=3, main="Hires of \n civil servants",cex.main=1.5)
lines(t5$period, t5$Control, type="o", lwd=2)
axis(side=1, at=c(1:3),labels=c("Q15", "Q16", "Q1"))
# Resignations conc
plot(t6$period, t6$Treatment, type="o", ylab="Mean", xlab="Quarter", xaxt="n", ylim=c(0,5), lwd=2, lty=3, main="Resignations of \n civil servants",cex.main=1.5)
lines(t6$period, t6$Control, type="o", lwd=2)
axis(side=1, at=c(1:3),labels=c("Q15", "Q16", "Q1"))
legend(-3,-8, col=c("black"), ncol=2, lty=c(1,3), cex=1, lwd=2, c("Incumbent wins", "Incumbent loses"),bty="n",xpd="NA")
dev.off()

# Generate Figure A.3 (outcome means on healthcare service delivery, by election result) --------

r1 <- c(mean(e[which(e$challenger_margin>0),"acs_visits_q15"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"acs_visits_q15"][[1]],na.rm=T),15, "ACS visits")
r2 <- c(mean(e[which(e$challenger_margin>0),"acs_visits_q16"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"acs_visits_q16"][[1]],na.rm=T),16, "ACS visits")
r3 <- c(mean(e[which(e$challenger_margin>0),"acs_visits_q1"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"acs_visits_q1"][[1]],na.rm=T),1, "ACS visits")

r4 <- c(mean(e[which(e$challenger_margin>0),"nurse_visits_q15"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"nurse_visits_q15"][[1]],na.rm=T),15, "Nurse visits")
r5 <- c(mean(e[which(e$challenger_margin>0),"nurse_visits_q16"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"nurse_visits_q16"][[1]],na.rm=T),16, "Nurse visits")
r6 <- c(mean(e[which(e$challenger_margin>0),"nurse_visits_q1"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"nurse_visits_q1"][[1]],na.rm=T),1, "Nurse visits")

r7 <- c(mean(e[which(e$challenger_margin>0),"doctor_visits_q15"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"doctor_visits_q15"][[1]],na.rm=T),15, "Doctor visits")
r8 <- c(mean(e[which(e$challenger_margin>0),"doctor_visits_q16"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"doctor_visits_q16"][[1]],na.rm=T),16, "Doctor visits")
r9 <- c(mean(e[which(e$challenger_margin>0),"doctor_visits_q1"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"doctor_visits_q1"][[1]],na.rm=T),1, "Doctor visits")

r10 <- c(mean(e[which(e$challenger_margin>0),"prenatal_checkups_q15"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"prenatal_checkups_q15"][[1]],na.rm=T),15, "Prenatal checkups")
r11 <- c(mean(e[which(e$challenger_margin>0),"prenatal_checkups_q16"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"prenatal_checkups_q16"][[1]],na.rm=T),16, "Prenatal checkups")
r12 <- c(mean(e[which(e$challenger_margin>0),"prenatal_checkups_q1"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"prenatal_checkups_q1"][[1]],na.rm=T),1, "Prenatal checkups")

r13 <- c(mean(e[which(e$challenger_margin>0),"infant_checkups_q15"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"infant_checkups_q15"][[1]],na.rm=T),15, "Infant checkups")
r14 <- c(mean(e[which(e$challenger_margin>0),"infant_checkups_q16"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"infant_checkups_q16"][[1]],na.rm=T),16, "Infant checkups")
r15 <- c(mean(e[which(e$challenger_margin>0),"infant_checkups_q1"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"infant_checkups_q1"][[1]],na.rm=T),1, "Infant checkups")

r16 <- c(mean(e[which(e$challenger_margin>0),"child_checkups_q15"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"child_checkups_q15"][[1]],na.rm=T),15, "Child checkups")
r17 <- c(mean(e[which(e$challenger_margin>0),"child_checkups_q16"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"child_checkups_q16"][[1]],na.rm=T),16, "Child checkups")
r18 <- c(mean(e[which(e$challenger_margin>0),"child_checkups_q1"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"child_checkups_q1"][[1]],na.rm=T),1, "Child checkups")

r19 <- c(mean(e[which(e$challenger_margin>0),"pregnancies_vaccinated_q15"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"pregnancies_vaccinated_q15"][[1]],na.rm=T),15, "Vaccines, pregnant women")
r20 <- c(mean(e[which(e$challenger_margin>0),"pregnancies_vaccinated_q16"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"pregnancies_vaccinated_q16"][[1]],na.rm=T),16, "Vaccines, pregnant women")
r21 <- c(mean(e[which(e$challenger_margin>0),"pregnancies_vaccinated_q1"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"pregnancies_vaccinated_q1"][[1]],na.rm=T),1, "Vaccines, pregnant women")

r22 <- c(mean(e[which(e$challenger_margin>0),"infants_vaccinated_q15"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"infants_vaccinated_q15"][[1]],na.rm=T),15, "Vaccines, infants")
r23 <- c(mean(e[which(e$challenger_margin>0),"infants_vaccinated_q16"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"infants_vaccinated_q16"][[1]],na.rm=T),16, "Vaccines, infants")
r24 <- c(mean(e[which(e$challenger_margin>0),"infants_vaccinated_q1"][[1]],na.rm=T), mean(e[which(e$challenger_margin<0),"infants_vaccinated_q1"][[1]],na.rm=T),1, "Vaccines, infants")

table <- as.tibble(rbind(r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15, r16, r17, r18, r19, r20, r21, r22, r23, r24))
colnames(table) <- c("Treatment", "Control", "Quarter", "Variable")
table$period <- rep(c(1:3),8)

t1 <- as.tibble(table[c(1:3),])
t2 <- as.tibble(table[c(4:6),])
t3 <- as.tibble(table[c(7:9),])
t4 <- as.tibble(table[c(10:12),])
t5 <- as.tibble(table[c(13:15),])
t6 <- as.tibble(table[c(16:18),])
t7 <- as.tibble(table[c(19:21),])
t8 <- as.tibble(table[c(22:34),])

pdf("../../plots/timeseries_healthcare.pdf", width=11, height=6)
par(mfrow=c(2,4))
par(mar=c(7.1, 4.1, 4.1, 2.1))
plot(t1$period, t1$Treatment, type="o", ylab="Mean", xlab="Quarter", xaxt="n", ylim=c(12000,16000), lwd=2, lty=3, main="Home visits by\n community health agents")
lines(t1$period, as.numeric(t1$Control), type="o", lwd=2)
axis(side=1, at=c(1:3),labels=c("Q15", "Q16", "Q1"))
plot(t2$period, t2$Treatment, type="o", ylab="Mean", xlab="Quarter", xaxt="n", ylim=c(250,450), lwd=2, lty=3, main="Home visits by\n nurses")
lines(t2$period, t2$Control, type="o", lwd=2)
axis(side=1, at=c(1:3),labels=c("Q15", "Q16", "Q1"))
plot(t3$period, t3$Treatment, type="o", ylab="Mean", xlab="Quarter", xaxt="n", ylim=c(150,300), lwd=2, lty=3, main="Home visits by\n doctors")
lines(t3$period, t3$Control, type="o", lwd=2)
axis(side=1, at=c(1:3),labels=c("Q15", "Q16", "Q1"))
plot(t7$period, t7$Treatment, type="o", ylab="Mean", xlab="Quarter", xaxt="n", ylim=c(250,350), lwd=2, lty=3, main="Vaccines up to date,\n pregnant women")
lines(t7$period, t7$Control, type="o", lwd=2)
axis(side=1, at=c(1:3),labels=c("Q15", "Q16", "Q1"))
plot(t4$period, t4$Treatment, type="o", ylab="Mean", xlab="Quarter", xaxt="n", ylim=c(250,600), lwd=2, lty=3, main="Prenatal care\n check-ups")
lines(t4$period, t4$Control, type="o", lwd=2)
axis(side=1, at=c(1:3),labels=c("Q15", "Q16", "Q1"))
plot(t5$period, t5$Treatment, type="o", ylab="Mean", xlab="Quarter", xaxt="n",  ylim=c(100, 250),lwd=2, lty=3, main="Medical consultations,\n infants")
lines(t5$period, t5$Control, type="o", lwd=2)
axis(side=1, at=c(1:3),labels=c("Q15", "Q16", "Q1"))
plot(t6$period, t6$Treatment, type="o", ylab="Mean", xlab="Quarter", xaxt="n", ylim=c(200,600), lwd=2, lty=3, main="Medical consultations,\n children")
lines(t6$period, t6$Control, type="o", lwd=2)
axis(side=1, at=c(1:3),labels=c("Q15", "Q16", "Q1"))
plot(t8$period, t8$Treatment, type="o", ylab="Mean", xlab="Quarter", xaxt="n", ylim=c(550,750), lwd=2, lty=3, main="Vaccines up to date,\n infants")
lines(t8$period, t8$Control, type="o", lwd=2)
axis(side=1, at=c(1:3),labels=c("Q15", "Q16", "Q1"))
legend(-4,450, col=c("black"), ncol=2, lty=c(1,3), cex=1, lwd=2, c("Incumbent wins", "Incumbent loses"),bty="n",xpd="NA")
dev.off()

# Generate Table A.2 (coverage of RAIS datasets) --------------------------

years <- c(2017, 2016, 2013, 2012, 2009, 2008, 2005, 2004)
municipalities <- rep(NA,length(years))
contracts <- rep(NA,length(years))
share_concurso <- rep(NA,length(years))

for(i in 1:length(years)){
  m <- read_csv(paste0("../../datasets/downloaded/employment/rais_processed/rais_municipal_employees_", years[i], ".csv"))
  municipalities[i] <- n_distinct(m$cod_ibge)
  contracts[i] <- nrow(m)
  share_concurso[i] <- mean(m$job_type %in% c(30,31))
}
muns_in_ibge <- c(5570, 5570, 5570, 5565, 5565, 5565, 5564, 5560)
muns_in_ibge <- muns_in_ibge - 2 # Brasilia and Fernando de Noronha do not have municipal elections
percent_in_rais <- municipalities/muns_in_ibge*100
results <- as.data.frame(cbind(municipalities, percent_in_rais, contracts, share_concurso))
results$municipalities <- as.character(results$municipalities)
results$percent_in_rais <- round(results$percent_in_rais,2)
results$contracts <- round(results$contracts/1000000,2) # express in millions
results$share_concurso <- round(results$share_concurso,2)
rownames(results) <- as.character(years)
colnames(results) <- c("Number of municipalities", "% of total", "Millions of contracts", "Share civil service")

print(file="../../tables/rais_anonymized_overview.tex",
      only.contents=TRUE,
      type="latex",
      xtable(results, align=c("lcccc")),caption.placement="top", rowname=results$Year, comment=F,floating=F,booktabs=T)

# Generate Table A.3 (characterize municipalities with close elections) --------

e$within_bandwidth_15 <- ifelse(abs(e$challenger_margin)<0.15,1,0)
e$within_bandwidth_10 <- ifelse(abs(e$challenger_margin)<0.1,1,0)
e$within_bandwidth_15_all <- ifelse(is.na(e$within_bandwidth_15),0, e$within_bandwidth_15)
e$within_bandwidth_10_all <- ifelse(is.na(e$within_bandwidth_10),0, e$within_bandwidth_10)

s1 <- lm(within_bandwidth_15 ~ log_population + log_gdp_per_capita + deaths_per_thousand + north + south + southeast + centerwest + as.factor(year), data=e)
s2 <- lm(within_bandwidth_10 ~ log_population + log_gdp_per_capita + deaths_per_thousand + north + south + southeast + centerwest + as.factor(year), data=e)
s3 <- lm(within_bandwidth_15_all ~ log_population + log_gdp_per_capita + deaths_per_thousand + north + south + southeast + centerwest + as.factor(year), data=e)
s4 <- lm(within_bandwidth_10_all ~ log_population + log_gdp_per_capita + deaths_per_thousand + north + south + southeast + centerwest + as.factor(year), data=e)

texreg(list(s1,s2,s3,s4),
       file="../../tables/rdd_sample.tex", table=T, float.pos="htp", scalebox=.9,
       digits=3,
       override.se = list(coeftest(s1, type="HC2")[,2], coeftest(s2, type="HC2")[,2], coeftest(s3, type="HC2")[,2], coeftest(s4, type="HC2")[,2]),
       override.pvalues = list(coeftest(s1, type="HC2")[,4], coeftest(s2, type="HC2")[,4], coeftest(s3, type="HC2")[,4], coeftest(s4, type="HC2")[,4]),
       booktabs = T, use.packages=F,
       custom.model.names = c("Mayor runs, \n 15 points", "Mayor runs, \n 10 points", "All, \n 15 points", "All, \n 10 points"),
       custom.coef.map = list("log_population" = "Population (logged)", "log_gdp_per_capita" = "GDP per capita (logged)", "deaths_per_thousand" = "Deaths per thousand", "north" = "North", "south" = "South", "southeast" = "Southeast", "centerwest" = "Center-west", "(Intercept)" = "Constant"),
       custom.gof.rows = list("Election fixed effects" = c("Yes", "Yes", "Yes", "Yes")),
       custom.gof.names = c("R-squared", "Observations"),
       reorder.gof = c(1, 3, 2),
       include.adjrs = F, 
       stars = c(0.05, 0.01, 0.001),
       groups = list("Region fixed effects" = 4:7),
       custom.note = paste("\\item %stars. HC2 standard errors in brackets."),
       caption="Characterization of the regression discontinuity effective sample",
       label="tab:rdd_sample",
       custom.coef.names=c("Constant", NA))

# Generate Figure A.1 (characterize municipalities not reporting data to RAIS) --------

rais2016 <- read_csv("../../datasets/downloaded/employment/rais_processed/rais_municipal_employees_2016.csv")
muns_in_rais <- unique(rais2016$cod_ibge)

all_muns <- e[which(e$year==2016),"cod_ibge"][[1]]
muns_not_in_rais <- sort(setdiff(all_muns, muns_in_rais)) 
muns_not_in_rais <- muns_not_in_rais[!muns_not_in_rais %in% c(530010,260545)] # remove Brasilia and Fernando de Noronha which do not have municipal government

m_all <- subset(e, e$year==2016)
m_missing_2016 <- subset(e, e$year==2016 & e$cod_ibge %in% muns_not_in_rais)

pdf("../../plots/rais_missing_2016.pdf",width=9,height=3)
par(mar=c(7.1, 4.1, 4.1, 2.1), mfrow=c(1,3))
plot(density(log(m_missing_2016$population),na.rm=T),main="",xlab="Population (logged, 2016)",lwd=2,lty=3,xlim=c(6.5,13))
lines(density(log(m_all$population),na.rm=T),lwd=2)
plot(density(log(m_missing_2016$gdp_per_capita),na.rm=T),main="",xlab="GDP per capita (logged, 2016)",lwd=2,lty=3,xlim=c(7.8,13))
lines(density(log(m_all$gdp_per_capita),na.rm=T),lwd=2)
plot(density(m_missing_2016$human_development_index,na.rm=T),main="",xlab="Human development index (2010)",lwd=2,lty=3,xlim=c(0.35,0.9))
lines(density(m_all$human_development_index,na.rm=T),lwd=2)               
legend(-0.65,-3, c("All municipalities", "Municipalities not in RAIS 2016"), lwd=2, lty=c(1,3),bty="n",xpd="NA", ncol=2)
dev.off()

# Generate Table A.26 (characterize municipalities by share of healthcare professionals in the civil service) -------

s1 <- lm(hcs_all ~ log_population + log_gdp_per_capita + deaths_per_thousand + north + south + southeast + centerwest + as.factor(year), data=e)
s2 <- lm(ht_all ~ log_population + log_gdp_per_capita + deaths_per_thousand + north + south + southeast + centerwest + as.factor(year), data=e)
s3 <- lm(share_civilservice ~ log_population + log_gdp_per_capita + deaths_per_thousand + north + south + southeast + centerwest + as.factor(year), data=e)

texreg(list(s1,s2,s3),
       file="../../tables/share_healthcareprofs_cs.tex", table=T, float.pos="htp", scalebox=.9,
       digits=3,
       override.se = list(coeftest(s1, type="HC2")[,2], coeftest(s2, type="HC2")[,2], coeftest(s3, type="HC2")[,2]),
       override.pvalues = list(coeftest(s1, type="HC2")[,4], coeftest(s2, type="HC2")[,4], coeftest(s3, type="HC2")[,2]),
       booktabs = T, use.packages=F,
       custom.model.names = c("All civil servants", "All temporaries", "Share civil servants"),
       custom.coef.map = list("log_population" = "Population (logged)", "log_gdp_per_capita" = "GDP per capita (logged)", "deaths_per_thousand" = "Deaths per thousand", "north" = "North", "south" = "South", "southeast" = "Southeast", "centerwest" = "Center-west", "(Intercept)" = "Constant"),
       custom.gof.rows = list("Election cycle fixed effects" = c("Yes", "Yes", "Yes")),
       custom.gof.names = c("R-squared", "Observations"),
       reorder.gof = c(1, 3, 2),
       include.adjrs = F, 
       stars = c(0.05, 0.01, 0.001),
       groups = list("Region fixed effects" = 4:7),
       custom.note = paste("\\item %stars. See Appendix E for variable definitions and sources. HC2 standard errors in brackets."),
       caption="Characterization of municipalities by their share of healthcare professionals in the civil service",
       label="tab:rdd_sample",
       custom.coef.names=c("Constant", NA))

# Notes: R version, platform, and loaded packages -----------------------

sessionInfo(package = NULL)

# R version 4.2.1 (2022-06-23)
# Platform: aarch64-apple-darwin20 (64-bit)
# Running under: macOS Monterey 12.1
# 
# Matrix products: default
# LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
# 
# locale:
#   [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
# 
# attached base packages:
#   [1] stats     graphics  grDevices utils     datasets  methods   base     
# 
# other attached packages:
#   [1] lmtest_0.9-40   zoo_1.8-10      xtable_1.8-4    texreg_1.38.6   here_1.0.1     
# [6] rvest_1.0.3     polite_0.1.2    forcats_0.5.2   stringr_1.5.0   dplyr_1.1.2    
# [11] purrr_0.3.4     readr_2.1.2     tidyr_1.2.0     tibble_3.2.1    ggplot2_3.3.6  
# [16] tidyverse_1.3.2
# 
# loaded via a namespace (and not attached):
#   [1] tidyselect_1.2.0    lattice_0.20-45     haven_2.5.1         gargle_1.2.0       
# [5] labelled_2.9.1      colorspace_2.0-3    vctrs_0.6.2         generics_0.1.3     
# [9] usethis_2.1.6       utf8_1.2.3          rlang_1.1.1         pillar_1.9.0       
# [13] glue_1.6.2          withr_2.5.0         DBI_1.1.3           dbplyr_2.2.1       
# [17] modelr_0.1.9        readxl_1.4.1        lifecycle_1.0.3     munsell_0.5.0      
# [21] gtable_0.3.1        cellranger_1.1.0    memoise_2.0.1       tzdb_0.3.0         
# [25] fastmap_1.1.0       fansi_1.0.4         broom_1.0.1         scales_1.2.1       
# [29] backports_1.4.1     googlesheets4_1.0.1 cachem_1.0.6        jsonlite_1.8.0     
# [33] fs_1.5.2            hms_1.1.2           stringi_1.7.12      codebook_0.9.2     
# [37] rprojroot_2.0.3     grid_4.2.1          cli_3.6.1           tools_4.2.1        
# [41] magrittr_2.0.3      crayon_1.5.1        pkgconfig_2.0.3     robotstxt_0.7.13   
# [45] ellipsis_0.3.2      xml2_1.3.3          reprex_2.0.2        googledrive_2.0.0  
# [49] lubridate_1.8.0     assertthat_0.2.1    httr_1.4.4          rstudioapi_0.14    
# [53] ratelimitr_0.4.1    R6_2.5.1            compiler_4.2.1     